sm<-read.table('ecol 562/sm003.txt', header=T, sep=',')
sm[1:14,]
#different nest sites have different numbers of observations
table(sm$strtno)
sm[sm$strtno==86,]
#There are missing values in the response
(1:nrow(sm))[is.na(sm$nesting2)]
sm[148:152,]
#indepedent logistic regression model
out.glm<-glm(nesting2~factor(year)+factor(period)+factor(deply)+rblval+ factor(year):factor(period)+ rblval*factor(period)+ size+I(size^2) family=binomial, data=sm)
summary(out.glm)
anova(out.glm,test='Chisq')

#gee package
library(gee)
#independence model
gee.in<-gee(nesting2~factor(year)+factor(period)+factor(deply)+rblval+ factor(year):factor(period)+ rblval*factor(period)+size+I(size^2),family=binomial, data=sm, id=strtno, corstr="independence", scale.fix=T)
names(gee.in)
summary(gee.in)
#exchangeable correlation
gee.ex<-gee(nesting2~factor(year)+factor(period)+factor(deply)+rblval+ factor(year):factor(period)+ rblval*factor(period)+size+I(size^2), family=binomial, data=sm, id=strtno, corstr="exchangeable", scale.fix=T)

names(gee.ex)
#estimated correlation matrix
round(gee.ex$working.correlation,3)
#scale parameter is fixed at 1
gee.ex$scale
#summary table
round(summary(gee.ex)$coefficients,3)
#p-value for reported z-statistic
round(2*(1-pnorm(abs(summary(gee.ex)$coefficients[,5]))),4)
summary(gee.in)
#independence model provides both naive and robust standard errors
round(summary(gee.in)$coefficients,3)

#function to calculate for Pan QIC for binary data
QIC.binom.gee<-function(model.R,model.independence)
{
#calculates binomial QAIC of Pan (2001)
#obtain trace term of QAIC
AIinverse<-solve(model.independence$naive.variance)
V.msR<-model.R$robust.variance
trace.term<-sum(diag(AIinverse%*%V.msR))
#estimated mean and observed values
mu.R<-model.R$fitted.values
y<-model.R$y
#scale for binary data
scale <- 1
#quasilikelihood for binomial model
quasi.R<-sum(y*log(mu.R/(1-mu.R))+log(1-mu.R))/scale
QIC<-(-2)*quasi.R + 2*trace.term
output <- c(QIC,trace.term)
names(output)<-c('QIC','CIC')
output
}
#QIC for exchangeable correlation
QIC.binom.gee(gee.ex,gee.in)
#QIC for independence model
QIC.binom.gee(gee.in,gee.in)

#compare robust and naive standard errors
sum(abs(summary(gee.in)$coefficients[,2]-summary(gee.in)$coefficients[,4]))
#there is no anova function in gee package
anova(gee.ex)

### geepack package #####
library(geepack)
#need a variable to uniquely identify order of the observations within a group
sm$wave<-paste(sm$year,sm$period,sep='.')
#convert character data to consecutive numbers
sm$wave<-as.numeric(factor(paste(sm$year,sm$period,sep='.')))

#exchangeable correlation
geeglm.ex<-geeglm(nesting2~factor(year)+factor(period)+factor(deply)+rblval+factor(year):factor(period)+ rblval*factor(period)+size+I(size^2),family=binomial, data=sm, id=strtno, corstr="exchangeable", scale.fix=T, waves=wave)
#independence correlation
geeglm.in<-geeglm(nesting2~factor(year)+factor(period)+factor(deply)+rblval+factor(year):factor(period)+ rblval*factor(period)+size+I(size^2),family=binomial, data=sm, id=strtno, corstr="independence", scale.fix=T, waves=wave)
#AR(1) correlation
geeglm.ar1<-geeglm(nesting2~factor(year)+factor(period)+factor(deply)+rblval+factor(year):factor(period)+ rblval*factor(period)+size+I(size^2), family=binomial, data=sm, id=strtno, corstr="ar1", scale.fix=T, waves=wave)
names(geeglm.ar1)
summary(geeglm.ar1)
names(geeglm.ar1$geese)
#coefficient estimates
geeglm.ar1$geese$beta
#variance-covariance matrix of estimates
dim(geeglm.ar1$geese$vbeta.naiv)
#naive standard errors
sqrt(diag(geeglm.ar1$geese$vbeta.naiv))
#z-statistic using naive standard errors
coef(geeglm.ar1)/sqrt(diag(geeglm.ar1$geese$vbeta.naiv))

#geepack has an anova function for multivariate Wald tests
anova(geeglm.ar1)

#Pan QIC for geeglm model
QIC.binom.geeglm<-function(model.geeglm,model.independence)
{
#calculates binomial QAIC of Pan (2001)
#obtain trace term of QAIC
AIinverse<-solve(model.independence$geese$vbeta.naiv)
V.msR<-model.geeglm$geese$vbeta
trace.term<-sum(diag(AIinverse%*%V.msR))
#estimated mean and observed values
mu.R<-model.geeglm$fitted.values
y<-model.geeglm$y
#scale for binary data
scale <- 1
#quasilikelihood for binomial model
quasi.R<-sum(y*log(mu.R/(1-mu.R))+log(1-mu.R))/scale
QIC<-(-2)*quasi.R + 2*trace.term
output <- c(QIC,trace.term)
names(output)<-c('QIC','CIC')
output
}

#compare QIC and CIC of three correlation models
sapply(list(geeglm.in,geeglm.ex,geeglm.ar1),function(x) QIC.binom.geeglm(x,geeglm.in))

### Mixed effects logistic regression: random intercepts
library(lme4)
lmerfit<-lmer(nesting2~factor(year)+factor(period)+factor(deply)+rblval+ factor(year):factor(period)+ rblval*factor(period)+size+I(size^2)+(1|strtno), family=binomial, data=sm)
slotNames(lmerfit)
lmerfit@deviance
summary(lmerfit)
#AIC of mixed effects model is lower
AIC(out.glm,lmerfit)

#compare coefficient estimates from GLM,GEE, and GLMM
out.comp<-round(cbind(coef(out.glm),coef(geeglm.ex),fixef(lmerfit)),3)
colnames(out.comp)<-c('GLM','GEE','GLMM')
out.comp

#draw subject-specific and marginal models versus rblval
#for Year 1999, period 1, area = 0

#marginal model
logit.ex<-function(x) {
lin.model<-coef(geeglm.ex)[1]+coef(geeglm.ex)[3]+coef(geeglm.ex)[8]*x
exp(lin.model)/(1+exp(lin.model)) }

#subject-specific model
logit.ss<-function(x,u=0) {
lin.model<-fixef(lmerfit)[1]+fixef(lmerfit)[3]+fixef(lmerfit)[8]*x+u
exp(lin.model)/(1+exp(lin.model)) }


range(sm$rblval)
#plot marginal model
curve(logit.ex,from=.05,to=2.3,xlab='rblval',ylab='Probability',ylim=c(0,1))
#add all subject-specific curves
sapply(ranef(lmerfit)[[1]][,1],function(y) curve(logit.ss(x,y),add=T,col='grey70',lty=2))->junk
#add typical subject-specific curve for u = 0
curve(logit.ss(x,0),add=T,col=1,lwd=2)
#add marginal model
curve(logit.ex,add=T,col=2,lwd=2)